# Packages
library(rnoaa)
## The rnoaa package will soon be retired and archived because the underlying APIs have changed dramatically. The package currently works but does not pull the most recent data in all cases. A noaaWeather package is planned as a replacement but the functions will not be interchangeable.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(plotly)
## Registered S3 method overwritten by 'httr':
##   method           from  
##   print.cache_info hoardr
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# Set NOAA token
options(noaakey = "qFfIOinHumNIZRIeSPImtlVWdeOffDRS")

token <- getOption("qFfIOinHumNIZRIeSPImtlVWdeOffDRS")
# Function: fetching one year of daily TMAX data for a station
get_year_data <- function(year, stationid) {
  ncdc(datasetid = "GHCND",
       stationid = stationid,
       datatypeid = "TMAX",
       startdate = paste0(year, "-01-01"),
       enddate   = paste0(year, "-12-31"),
       limit     = 1000,
       token     = token)$data
}

# Fetching multiple years and combine
years <- 2015:2019
station_id <- "GHCND:USW00094728"  # NYC Central Park
nyc_list <- lapply(years, get_year_data, stationid = station_id)
nyc_daily <- bind_rows(nyc_list)

# Cleaning + parsing
nyc_temp <- nyc_daily %>%
  mutate(date = ymd_hms(.data$date),
         tmax_c = value / 10)

Daily temperatures

p1 <- ggplot(nyc_temp, aes(date, tmax_c)) +
  geom_line(color="firebrick") +
  labs(title="Daily Max Temp - NYC Central Park",
       y="°C", x="Date") +
  theme_minimal()
ggplotly(p1)

Monthly average temperatures

nyc_monthly <- nyc_temp %>%
  mutate(year = year(date), month = month(date)) %>%
  group_by(year, month) %>%
  summarize(tmax_c = mean(tmax_c, na.rm=TRUE), .groups="drop") %>%
  mutate(date = ymd(paste(year, month, 15)))

p2 <- ggplot(nyc_monthly, aes(date, tmax_c)) +
  geom_line(color="steelblue") +
  labs(title="Monthly Avg Max Temp - NYC",
       y="°C", x="Date") +
  theme_minimal()
ggplotly(p2)

Rolling averages (5-month and 12-month)

nyc_monthly <- nyc_monthly %>%
  arrange(date) %>%
  mutate(roll5 = rollmean(tmax_c, 5, fill=NA, align="right"),
         roll12 = rollmean(tmax_c, 12, fill=NA, align="right"))

p3 <- ggplot(nyc_monthly, aes(date)) +
  geom_line(aes(y=tmax_c), alpha=0.4) +
  geom_line(aes(y=roll5), color="darkorange", size=1) +
  geom_line(aes(y=roll12), color="darkgreen", size=1, linetype="dashed") +
  labs(title="NYC Max Temp (smoothed)",
       y="°C", x="Date") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplotly(p3)

Seasonal cycle + monthly distribution

p4 <- ggplot(nyc_monthly, aes(x=factor(month), y=tmax_c)) +
  geom_boxplot(fill="lightblue") +
  labs(title="Seasonal Cycle - NYC (2015–2019)",
       x="Month", y="Monthly Avg Max Temp (°C)") +
  theme_minimal()
ggplotly(p4)

CO₂ vs Temperature

nyc_monthly <- nyc_temp %>%
  mutate(year = year(date),
         month = month(date)) %>%
  group_by(year, month) %>%
  summarize(tmax_c = mean(tmax_c, na.rm=TRUE), .groups="drop") %>%
  mutate(year = as.integer(year),
         month = as.integer(month),
         date = ymd(paste(year, month, 15)))   # 15th day for monthly midpoint

co2 <- read_csv("https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.csv",
                comment="#",
                col_names=c("year","month","decimal","average","interpolated",
                            "trend","days")) %>%
  mutate(year = as.integer(year),
         month = as.integer(month),
         date = ymd(paste(year, month, 15)))
## Rows: 811 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): year, month, decimal, average, interpolated, trend, days, X8
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `year = as.integer(year)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
combined <- inner_join(nyc_monthly, co2, by=c("year","month","date"))

p5 <- ggplot(combined, aes(x=trend, y=tmax_c)) +
  geom_point(alpha=0.5) +
  geom_smooth(method="lm", color="red") +
  labs(title="NYC Temp vs Mauna Loa CO₂",
       x="CO₂ (ppm)", y="NYC Max Temp (°C)") +
  theme_minimal()
ggplotly(p5)
## `geom_smooth()` using formula = 'y ~ x'

Extremes (heat days >30°C)

nyc_extremes <- nyc_temp %>%
  filter(tmax_c > 30) %>%
  mutate(year = year(date)) %>%
  group_by(year) %>%
  summarize(hot_days = n(), .groups="drop")

p6 <- ggplot(nyc_extremes, aes(x=factor(year), y=hot_days)) +
  geom_col(fill="tomato") +
  labs(title="Hot Days >30°C per Year (NYC)",
       x="Year", y="Number of Days") +
  theme_minimal()
ggplotly(p6)
cat("
### Questions from Visualizations

1. **Is the frequency of >30°C days in NYC increasing?**  
2. **How tightly do NYC temperatures track rising CO₂ levels?**  
3. **Does the seasonal cycle show shifting onset of heat?**  
4. **Do rolling averages reveal accelerated warming after 2015?**  
5. **Are there noticeable trends in monthly temperature variability?**  
")
## 
## ### Questions from Visualizations
## 
## 1. **Is the frequency of >30°C days in NYC increasing?**  
## 2. **How tightly do NYC temperatures track rising CO₂ levels?**  
## 3. **Does the seasonal cycle show shifting onset of heat?**  
## 4. **Do rolling averages reveal accelerated warming after 2015?**  
## 5. **Are there noticeable trends in monthly temperature variability?**